#1.导入需要的库，并建立数据库连接。需要先安装好mysql数据库在本机上。
from urllib import request

import requests
from bs4 import BeautifulSoup
import re
import pymysql

connect=pymysql.connect(user='root',password='Kpds@512',host='localhost',port=3306,db='spiders',charset='utf8')
conn=connect.cursor()
sql = "CREATE TABLE IF NOT EXISTS spider_ts(Id Int Unsigned Auto_Increment,main_title text,key_world text,sub_title text,from_url Varchar(255),fbrq Varchar(20),Primary Key(`Id`));"

conn.execute(sql)
conn.execute("use spiders;")

conn.execute('drop table if exists spider_ts;')


#2.获取主网页信息。加上headers，利用requests.get()方法访问网页，用BeautifulSoup和lxml解析；
for i in range(11,17):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko'}

    link='http://www.eastwp.net/market/list.php?catid='+str(i)+'/'
    print(link)
    r = requests.get(link, headers=headers)
    print('The page is:',i)
    page = request.urlopen(link)
    ret = page.read()
    print(ret)
    conn.execute("insert into spider_ts(main_title,from_url) VALUES ('%s','%s')"%(ret,link))
    conn.commit()